#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2021/4/9 0009 13:05
# @Author : Moyan# @Site : 
# @File : NcwuSchoolNotifySpider.py
# @Software: PyCharm
import re
import requests

page = 1
while page >= 1 :
    if page == 1:
        pageCont = "";
    else :
        pageCont = "_{}".format(page);
    url = "https://www5.ncwu.edu.cn/channels/5{}.html".format(pageCont)
    page+=1
    print(url)
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36"
        }
    response = requests.get(url,headers = headers)
    data = response.content.decode('utf-8')
    item_list = re.findall('description">(.*?)<div class="cont">',data,re.S)
    if len(item_list) < 1 :
        break;
    if page > 10 :
        break;
    for h in item_list:
        # print(h)
        cont1 =  re.findall('class="dw">(.*?)</a>',h,re.S);
        cont2 = re.findall('.html">(.*?)</a>', h, re.S);

        print(cont1[0] + cont2[0])